<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to license@zend.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Pdf
 * @subpackage Fonts
 * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 * @version    $Id: SegmentToDelta.php 18993 2009-11-15 17:09:16Z alexander $
 */

/** Zend_Pdf_Cmap */
require_once 'Zend/Pdf/Cmap.php';


/**
 * Implements the "segment mapping to delta values" character map (type 4).
 *
 * This is the Microsoft standard mapping table type for OpenType fonts. It
 * provides the ability to cover multiple contiguous ranges of the Unicode
 * character set, with the exception of Unicode Surrogates (U+D800 - U+DFFF).
 *
 * @package    Zend_Pdf
 * @subpackage Fonts
 * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
class Zend_Pdf_Cmap_SegmentToDelta extends Zend_Pdf_Cmap
{
	/**** Instance Variables ****/


	/**
	 * The number of segments in the table.
	 * @var integer
	 */
	protected $_segmentCount = 0;

	/**
	 * The size of the binary search range for segments.
	 * @var integer
	 */
	protected $_searchRange = 0;

	/**
	 * The number of binary search steps required to cover the entire search
	 * range.
	 * @var integer
	 */
	protected $_searchIterations = 0;

	/**
	 * Array of ending character codes for each segment.
	 * @var array
	 */
	protected $_segmentTableEndCodes = array();

	/**
	 * The ending character code for the segment at the end of the low search
	 * range.
	 * @var integer
	 */
	protected $_searchRangeEndCode = 0;

	/**
	 * Array of starting character codes for each segment.
	 * @var array
	 */
	protected $_segmentTableStartCodes = array();

	/**
	 * Array of character code to glyph delta values for each segment.
	 * @var array
	 */
	protected $_segmentTableIdDeltas = array();

	/**
	 * Array of offsets into the glyph index array for each segment.
	 * @var array
	 */
	protected $_segmentTableIdRangeOffsets = array();

	/**
	 * Glyph index array. Stores glyph numbers, used with range offset.
	 * @var array
	 */
	protected $_glyphIndexArray = array();



	/**** Public Interface ****/


	/* Concrete Class Implementation */

	/**
	 * Returns an array of glyph numbers corresponding to the Unicode characters.
	 *
	 * If a particular character doesn't exist in this font, the special 'missing
	 * character glyph' will be substituted.
	 *
	 * See also {@link glyphNumberForCharacter()}.
	 *
	 * @param array $characterCodes Array of Unicode character codes (code points).
	 * @return array Array of glyph numbers.
	 */
	public function glyphNumbersForCharacters($characterCodes)
	{
		$glyphNumbers = array();
		foreach ($characterCodes as $key => $characterCode) {

			/* These tables only cover the 16-bit character range.
			 */
			if ($characterCode > 0xffff) {
				$glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
				continue;
			}

			/* Determine where to start the binary search. The segments are
			 * ordered from lowest-to-highest. We are looking for the first
			 * segment whose end code is greater than or equal to our character
			 * code.
			 *
			 * If the end code at the top of the search range is larger, then
			 * our target is probably below it.
			 *
			 * If it is smaller, our target is probably above it, so move the
			 * search range to the end of the segment list.
			 */
			if ($this->_searchRangeEndCode >= $characterCode) {
				$searchIndex = $this->_searchRange;
			} else {
				$searchIndex = $this->_segmentCount;
			}

			/* Now do a binary search to find the first segment whose end code
			 * is greater or equal to our character code. No matter the number
			 * of segments (there may be hundreds in a large font), we will only
			 * need to perform $this->_searchIterations.
			 */
			for ($i = 1; $i <= $this->_searchIterations; $i++) {
				if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
					$subtableIndex = $searchIndex;
					$searchIndex -= $this->_searchRange >> $i;
				} else {
					$searchIndex += $this->_searchRange >> $i;
				}
			}

			/* If the segment's start code is greater than our character code,
			 * that character is not represented in this font. Move on.
			 */
			if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
				$glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
				continue;
			}

			if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
				/* This segment uses a simple mapping from character code to
				 * glyph number.
				 */
				$glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;

			} else {
				/* This segment relies on the glyph index array to determine the
				 * glyph number. The calculation below determines the correct
				 * index into that array. It's a little odd because the range
				 * offset in the font file is designed to quickly provide an
				 * address of the index in the raw binary data instead of the
				 * index itself. Since we've parsed the data into arrays, we
				 * must process it a bit differently.
				 */
				$glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
				$this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
				$subtableIndex - 1);
				$glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex];

			}

		}
		return $glyphNumbers;
	}

	/**
	 * Returns the glyph number corresponding to the Unicode character.
	 *
	 * If a particular character doesn't exist in this font, the special 'missing
	 * character glyph' will be substituted.
	 *
	 * See also {@link glyphNumbersForCharacters()} which is optimized for bulk
	 * operations.
	 *
	 * @param integer $characterCode Unicode character code (code point).
	 * @return integer Glyph number.
	 */
	public function glyphNumberForCharacter($characterCode)
	{
		/* This code is pretty much a copy of glyphNumbersForCharacters().
		 * See that method for inline documentation.
		 */

		if ($characterCode > 0xffff) {
			return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
		}

		if ($this->_searchRangeEndCode >= $characterCode) {
			$searchIndex = $this->_searchRange;
		} else {
			$searchIndex = $this->_segmentCount;
		}

		for ($i = 1; $i <= $this->_searchIterations; $i++) {
			if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
				$subtableIndex = $searchIndex;
				$searchIndex -= $this->_searchRange >> $i;
			} else {
				$searchIndex += $this->_searchRange >> $i;
			}
		}

		if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
			return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
		}

		if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
			$glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
		} else {
			$glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
			$this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
			$subtableIndex - 1);
			$glyphNumber = $this->_glyphIndexArray[$glyphIndex];
		}
		return $glyphNumber;
	}

	/**
	 * Returns an array containing the Unicode characters that have entries in
	 * this character map.
	 *
	 * @return array Unicode character codes.
	 */
	public function getCoveredCharacters()
	{
		$characterCodes = array();
		for ($i = 1; $i <= $this->_segmentCount; $i++) {
			for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) {
				$characterCodes[] = $code;
			}
		}
		return $characterCodes;
	}


	/**
	 * Returns an array containing the glyphs numbers that have entries in this character map.
	 * Keys are Unicode character codes (integers)
	 *
	 * This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters())
	 * call, but this method do it in more effective way (prepare complete list instead of searching
	 * glyph for each character code).
	 *
	 * @internal
	 * @return array Array representing <Unicode character code> => <glyph number> pairs.
	 */
	public function getCoveredCharactersGlyphs()
	{
		$glyphNumbers = array();

		for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) {
			if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) {
				$delta = $this->_segmentTableIdDeltas[$segmentNum];

				for ($code =  $this->_segmentTableStartCodes[$segmentNum];
				$code <= $this->_segmentTableEndCodes[$segmentNum];
				$code++) {
					$glyphNumbers[$code] = ($code + $delta) % 65536;
				}
			} else {
				$code       = $this->_segmentTableStartCodes[$segmentNum];
				$glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1;

				while ($code <= $this->_segmentTableEndCodes[$segmentNum]) {
					$glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex];

					$code++;
					$glyphIndex++;
				}
			}
		}

		return $glyphNumbers;
	}



	/* Object Lifecycle */

	/**
	 * Object constructor
	 *
	 * Parses the raw binary table data. Throws an exception if the table is
	 * malformed.
	 *
	 * @param string $cmapData Raw binary cmap table data.
	 * @throws Zend_Pdf_Exception
	 */
	public function __construct($cmapData)
	{
		/* Sanity check: The table should be at least 23 bytes in size.
		 */
		$actualLength = strlen($cmapData);
		if ($actualLength < 23) {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception('Insufficient table data',
			Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL);
		}

		/* Sanity check: Make sure this is right data for this table type.
		 */
		$type = $this->_extractUInt2($cmapData, 0);
		if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception('Wrong cmap table type',
			Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE);
		}

		$length = $this->_extractUInt2($cmapData, 2);
		if ($length != $actualLength) {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)",
			Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH);
		}

		/* Mapping tables should be language-independent. The font may not work
		 * as expected if they are not. Unfortunately, many font files in the
		 * wild incorrectly record a language ID in this field, so we can't
		 * call this a failure.
		 */
		$language = $this->_extractUInt2($cmapData, 4);
		if ($language != 0) {
			// Record a warning here somehow?
		}

		/* These two values are stored premultiplied by two which is convienent
		 * when using the binary data directly, but we're parsing it out to
		 * native PHP data types, so divide by two.
		 */
		$this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1;
		$this->_searchRange  = $this->_extractUInt2($cmapData, 8) >> 1;

		$this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1;

		$offset = 14;
		for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
			$this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset);
		}

		$this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange];

		$offset += 2;    // reserved bytes

		for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
			$this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset);
		}

		for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
			$this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset);    // signed
		}

		/* The range offset helps determine the index into the glyph index array.
		 * Like the segment count and search range above, it's stored as a byte
		 * multiple in the font, so divide by two as we extract the values.
		 */
		for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
			$this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1;
		}

		/* The size of the glyph index array varies by font and depends on the
		 * extent of the usage of range offsets versus deltas. Some fonts may
		 * not have any entries in this array.
		 */
		for (; $offset < $length; $offset += 2) {
			$this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset);
		}

		/* Sanity check: After reading all of the data, we should be at the end
		 * of the table.
		 */
		if ($offset != $length) {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)",
			Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH);
		}
	}

}
